{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Machine Learning Homework 1\n",
    "\n",
    "### Task 2\n",
    "\n",
    "MultiBoosting 算法[2] 将AdaBoost 作为Bagging 的基学习器，Iterative Bagging 算法[1] 则是将Bagging 作为AdaBoost 的基学习器。试比较二者的优缺点。\n",
    "\n",
    "Author: Zhilong Hong"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Import part\n",
    "import time\n",
    "import pandas as pd\n",
    "from scipy.stats import uniform\n",
    "\n",
    "from multiboost import MultiBoostClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "from sklearn.ensemble import BaggingClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from evaluation import ClassifierEvaluator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Read data from file\n",
    "\n",
    "def convert_rank_to_int(rank):\n",
    "    return ord(rank) - 96\n",
    "\n",
    "\n",
    "f = open(\"./dataset/krkopt.data\", 'r')\n",
    "data_set = []\n",
    "for line in f.readlines():\n",
    "    data = line.strip().split(',')\n",
    "    data_set.append({\n",
    "        \"white-king-file\": convert_rank_to_int(data[0]),\n",
    "        \"white-king-rank\": data[1],\n",
    "        \"white-rook-file\": convert_rank_to_int(data[2]),\n",
    "        \"white-root-rank\": data[3],\n",
    "        \"black-king-file\": convert_rank_to_int(data[4]),\n",
    "        \"black-king-rank\": data[5],\n",
    "        \"optimal-depth-of-win\": data[6]\n",
    "    })\n",
    "data_set = pd.DataFrame.from_dict(data_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Prepare dataset\n",
    "#data_set=data_set[data_set['optimal-depth-of-win'].isin(['fourteen','draw'])]\n",
    "\n",
    "x = data_set[[col for col in data_set.columns if col not in [\"optimal-depth-of-win\"]]]\n",
    "y = data_set[[\"optimal-depth-of-win\"]].values.reshape((-1))\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.844262295082\n",
      "Wall time: 120 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=None),n_estimators=100)\n",
    "clf.fit(x_train, y_train)\n",
    "score = accuracy_score(y_test, clf.predict(x_test))\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.870990734141\n",
      "Wall time: 6.86 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "clf = MultiBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=None), n_estimators=100)\n",
    "clf.fit(x_train, y_train)\n",
    "score = accuracy_score(y_test, clf.predict(x_test))\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.868852459016\n",
      "Wall time: 2.67 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(max_depth=None), n_estimators=100)\n",
    "clf.fit(x_train, y_train)\n",
    "score = accuracy_score(y_test, clf.predict(x_test))\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [03:37<00:00, 21.70s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bias: 0.151714736282 Var: 2.22779479009e-06\n"
     ]
    }
   ],
   "source": [
    "clf = MultiBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=None), n_estimators=100)\n",
    "e = ClassifierEvaluator(clf)\n",
    "e.fit(x, y)\n",
    "print('Bias:', e.bias(), 'Var:', e.var())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  3.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bias: 0.187108527382 Var: 5.43669471653e-06\n"
     ]
    }
   ],
   "source": [
    "clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=None), n_estimators=100)\n",
    "e = ClassifierEvaluator(clf)\n",
    "e.fit(x, y)\n",
    "print('Bias:', e.bias(), 'Var:', e.var())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:11<00:00,  7.19s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bias: 0.153726299319 Var: 5.31041334203e-06\n"
     ]
    }
   ],
   "source": [
    "clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(max_depth=None), n_estimators=100)\n",
    "e = ClassifierEvaluator(clf)\n",
    "e.fit(x, y)\n",
    "print('Bias:', e.bias(), 'Var:', e.var())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
